import pandas as pd
import os
from os import path


def main(src,dst):
    assert path.exists(src),'源文件夹路径不存在'
    if not path.exists(dst):
        os.mkdir(dst)
    file_list = os.listdir(src)
    # 日期、分钟、开、高、低、收、量、额
    col_names = ['trade_date','trade_min','open','high','low','close','vol','amount','0']
    fin = 0
    length = len(file_list)
    for file_name in file_list:
        file_path = path.join(src,file_name)
        # 通信达数据是gbk格式
        try:
            df = pd.read_csv(file_path,header=None,names=col_names,encoding='gbk')
        except:
            continue
        # 删掉异常数据，排序
        df.dropna(inplace=True)
        df.drop('0',axis=1,inplace=True)
        df.sort_values(['trade_date','trade_min'])
        # 保存到src
        file_path = path.join(dst,file_name)
        df.to_csv(file_path,index=False,encoding='utf-8')
        fin += 1
        print(f'\r{file_name} 处理中 {fin}/{length}   ',end='',flush=True)
        
        

src = r'E:\new_tdx\T0002\期货主次连5分钟220414-230310'
dst = r'data\5min'        
main(src,dst)